Prewarm LLM cache (#6692)

akatsoulas · escattone · web-flow · commit c055b0420840 · 2025-05-30T13:20:04.000-07:00
* Prewarm LLM cache

* pre-warm LLM cache only if project defined

---------

Co-authored-by: Ryan Johnson &lt;rjohnson@mozilla.com&gt;
diff --git a/.env-test b/.env-test
@@ -22,5 +22,3 @@ REUSE_DB=0
 ENABLE_ADMIN=True
 SET_LOCALE_PATH=False
 SECURE_SSL_REDIRECT=False
-GOOGLE_APPLICATION_CREDENTIALS=creds
-GOOGLE_CLOUD_PROJECT=sumo-test
diff --git a/kitsune/llm/apps.py b/kitsune/llm/apps.py
@@ -1,6 +1,14 @@
 from django.apps import AppConfig
+from django.conf import settings
 
 
 class LLMConfig(AppConfig):
     name = "kitsune.llm"
     default_auto_field = "django.db.models.AutoField"
+
+    def ready(self):
+        from kitsune.llm.utils import get_llm
+
+        if settings.GOOGLE_CLOUD_PROJECT:
+            # pre-warm the LLM cache
+            get_llm()
diff --git a/kitsune/llm/questions/classifiers.py b/kitsune/llm/questions/classifiers.py
@@ -7,7 +7,6 @@
 from kitsune.llm.utils import get_llm
 from kitsune.products.utils import get_taxonomy
 
-DEFAULT_LLM_MODEL = "gemini-2.5-flash-preview-04-17"
 HIGH_CONFIDENCE_THRESHOLD = 75
 LOW_CONFIDENCE_THRESHOLD = 60
 
@@ -26,7 +25,7 @@ def classify_question(question: "Question") -> dict[str, Any]:
     Analyze a question for spam and, if not spam or low confidence, classify the topic.
     Returns a dict with keys: action, spam_result, topic_result (optional).
     """
-    llm = get_llm(model_name=DEFAULT_LLM_MODEL)
+    llm = get_llm()
 
     product = question.product
     payload: dict[str, Any] = {
diff --git a/kitsune/llm/utils.py b/kitsune/llm/utils.py
@@ -1,11 +1,13 @@
-from functools import cache
+from functools import lru_cache
 
 from langchain.chat_models.base import BaseChatModel
 
+DEFAULT_LLM_MODEL = "gemini-2.5-flash-preview-04-17"
 
-@cache
+
+@lru_cache(maxsize=1)
 def get_llm(
-    model_name: str,
+    model_name: str = DEFAULT_LLM_MODEL,
     temperature: int = 1,
     max_tokens: int | None = None,
     max_retries: int = 2,
diff --git a/kitsune/settings.py b/kitsune/settings.py
@@ -1337,8 +1337,6 @@ def filter_exceptions(event, hint):
 
 USER_INACTIVITY_DAYS = config("USER_INACTIVITY_DAYS", default=1095, cast=int)
 
-if DEV:
-    GOOGLE_APPLICATION_CREDENTIALS = config("GOOGLE_APPLICATION_CREDENTIALS", default="")
 GOOGLE_CLOUD_PROJECT = config("GOOGLE_CLOUD_PROJECT", default="")
 
 # shell_plus conf